load("C:/Users/Shreyas/Box/PhD/Twitter RA/.RData")
setwd("C:/Users/Shreyas/Box/PhD/Twitter RA")
library(dplyr)
library(stringr)
library(ggplot2)
# Define the expanded keywords for each topic
topics <- list(
Race = c("race", "racism", "racial discrimination", "racial disparities", "racial inequality", "ethnic", "ethnic group", "ethnicity", "minority"),
LowIncome = c("poverty", "socioeconomic", "economic inequality", "income gap", "financial hardship", "disadvantaged", "impoverished", "underprivileged"),
Gender = c("gender equality", "gender discrimination", "gender bias", "gender pay gap", "gender identity", "sexism", "gender stereotypes", "LGBTQ+"),
Immigration = c("immigrants", "immigration policy", "undocumented", "refugee", "asylum seekers", "border control", "migration", "citizenship"),
Age = c("elderly", "senior citizens", "aging population", "ageism", "generational", "youth", "baby boomers", "millennials")
)
# Function to count mentions of keywords in the text
count_mentions <- function(data, keywords) {
data %>%
mutate(across(starts_with("Topic_"), ~str_count(tolower(text), paste(keywords, collapse = "|")))) %>%
summarise(across(starts_with("Topic_"), sum))
}
# Count mentions of each topic for both accounts
mentions_count <- combined_data %>%
group_by(screen_name) %>%
summarise(across(starts_with("Topic_"), ~count_mentions(., topics[[cur_column()]])))
# Reshape data for plotting
mentions_count_long <- mentions_count %>%
pivot_longer(cols = starts_with("Topic_"), names_to = "Topic", values_to = "Mentions")
library(tidyr)
# Reshape data for plotting
mentions_count_long <- mentions_count %>%
pivot_longer(cols = starts_with("Topic_"), names_to = "Topic", values_to = "Mentions")
# Create the bar graph
ggplot(mentions_count_long, aes(x = Topic, y = Mentions, fill = screen_name)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Topics", y = "Number of Mentions", fill = "Twitter Account") +
ggtitle("Mentions of Various Topics between PNHP and P4AHCF") +
theme_minimal()
# Reshape data for plotting
mentions_count_long <- mentions_count %>%
pivot_longer(cols = starts_with("Topic_"), names_to = "Topic", values_to = "Mentions")
# Define the expanded keywords for each topic
topics <- list(
Race = c("race", "racism", "racial discrimination", "racial disparities", "racial inequality", "ethnic", "ethnic group", "ethnicity", "minority"),
LowIncome = c("poverty", "socioeconomic", "economic inequality", "income gap", "financial hardship", "disadvantaged", "impoverished", "underprivileged"),
Gender = c("gender equality", "gender discrimination", "gender bias", "gender pay gap", "gender identity", "sexism", "gender stereotypes", "LGBTQ+"),
Immigration = c("immigrants", "immigration policy", "undocumented", "refugee", "asylum seekers", "border control", "migration", "citizenship"),
Age = c("elderly", "senior citizens", "aging population", "ageism", "generational", "youth", "baby boomers", "millennials")
)
# Function to count mentions of keywords in the text
count_mentions <- function(data, keywords) {
data %>%
mutate(across(starts_with("Topic_"), ~str_count(tolower(text), paste(keywords, collapse = "|")))) %>%
summarise(across(starts_with("Topic_"), sum))
}
# Count mentions of each topic for both accounts
mentions_count <- combined_data %>%
group_by(screen_name) %>%
summarise(across(starts_with("Topic_"), ~count_mentions(., topics[[cur_column()]])))
# Reshape data for plotting
mentions_count_long <- mentions_count %>%
pivot_longer(cols = starts_with("Topic_"), names_to = "Topic", values_to = "Mentions")
View(combined_data)
library(dplyr)
str(combined_data)
# This will give you the rows of the combined_data where 'text' is duplicated
duplicate_tweets <- combined_data[duplicated(combined_data$text), ]
View(duplicate_tweets)
